# THIS SCRIPT CODES AND CLEANS DATA FROM THE ANES CUMULATIVE FILE. IT NEEDS TO BE 
# USED IN CONJUNCTION WITH THE FILE (AVAILABLE ON DATAVERSE) 
# ENTITLED "CCES16_Common_OUTPUT_Feb2018_VV.RData"
library(foreign)
library(car)
library(readstata13)
library(ggplot2)
library(stargazer)
library(grid)
library(gridExtra)
library(survey)
library(dplyr)
library(lemon)
library(ggpubr)
library(reshape2)
#-----
# READ IN THE DATA. SELECT THE FILE "CCES16_Common_OUTPUT_Feb2018_VV.RData"
load(file.choose())
our.data = x

# VOTED IN PRESIDENTIAL PRIMARIES
our.data$voted.presprimaries = ifelse(is.na(our.data$CL_E2016PPVM), 0, 1)
table(our.data$voted.presprimaries, our.data$CL_E2016PPVM)
table(our.data$voted.presprimaries, our.data$CL_state)
# 1 = VOTED IN PRESIDENTIAL PRIMARIES; 0 = DID NOT VOTE IN PRESIDENTIAL PRIMARIES
# THIS COUNTS UN-MATCHED RESPONDENTS AS NON-VOTERS -- CREATING NO MISSING DATA POINTS. 
# NOTE THAT THE CES GUIDE SAYS THAT 47 PERCENT OF THOSE MATCHED TO THE VOTER FILE 
# WERE CODED AS HAVING VOTED IN THE PRESIDENTIAL PRIMARIES; THAT IS, 47 PERCENT OF 
# THOSE MATCHED IN CL_matched DID NOT HAVE A MISSING VALUE FOR CL_E2016PPVM
table(our.data$CL_E2016PPEP, our.data$CL_state)
table(our.data$CL_E2016PPEP, our.data$voted.presprimaries)

# STATES IN 2016 THAT DIDN'T HOLD PRESIDENTIAL PRIMARIES
states.2016 = c("Alaska", "Colorado", "Hawaii", "Iowa", "Kansas", 
                "Maine", "Minnesota", "North Dakota", "Nevada",
                "Oregon", "Utah", "Wyoming")

# VOTED IN PRESIDENTIAL PRIMARIES, DROPPING RESPONDENTS 
# FROM STATES THAT DIDN'T HOLD PRESIDENTIAL PRIMARIES
our.data$voted.presprimaries2 = our.data$voted.presprimaries
table(our.data$voted.presprimaries2)
our.data$voted.presprimaries2[our.data$inputstate %in% c(states.2016)] = NA
table(our.data$voted.presprimaries2)

# VOTED IN GENERAL ELECTION
table(our.data$CL_E2016GVM)
our.data$voted.general = ifelse(is.na(our.data$CL_E2016GVM)=="TRUE", 0, 1)
table(our.data$voted.general, our.data$CL_E2016GVM)
# 1 = VALIDATED VOTING RECORD, GENERAL ELECTION; 0 = NOT VALIDATED VOTING RECORD 

# PARTISANSHIP 
table(our.data$pid7)
our.data$partisanship = car::recode(as.numeric(our.data$pid7), "8 = 4") - 1
table(our.data$partisanship, our.data$pid7)
# 0-TO-6 SCALE, 0 = STRONG DEMOCRAT; 6 = STRONG REPUBLICAN; NOT SURE CODED AS 
# PURE INDEPENDENTS

# REPUBLICAN
our.data$republican = ifelse((our.data$partisanship>=4 & our.data$partisanship<=6), 1, 0)
table(our.data$republican, our.data$partisanship)
# 1 = REPUBLICAN; 0 = NOT REPUBLICAN

# DEMOCRAT
our.data$democrat = ifelse((our.data$partisanship>=0 & our.data$partisanship<=2), 1, 0)
table(our.data$democrat, our.data$partisanship)
# 1 = DEMOCRAT; 0 = NOT DEMOCRAT

# IDEOLOGY
table(our.data$CC16_340a)
our.data$ideology = car::recode(as.numeric(our.data$CC16_340a), "8 = 4") - 1
table(our.data$ideology, our.data$CC16_340a)
# 0-TO-6 SCALE, 0 = VERY LIBERAL; 6 = VERY CONSERVATIVE; 'NOT SURE' CODED AS MIDDLE OF THE ROAD

# CONSERVATIVE
our.data$conservative = ifelse((our.data$ideology>=4 & our.data$ideology<=6), 1, 0)
table(our.data$conservative, our.data$CC16_340a)
# 1 = CONSERVATIVE; 0 = NOT CONSERVATIVE

# LIBERAL
our.data$liberal = ifelse((our.data$ideology>=0 & our.data$ideology<=2), 1, 0)
table(our.data$liberal, our.data$CC16_340a)
# 1 = CONSERVATIVE; 0 = NOT CONSERVATIVE

# SORTED
our.data$sorted = rep(0, length(our.data$ideology))
our.data$sorted[our.data$republican==1 & our.data$conservative==1] = 1
table(our.data$sorted)
our.data$sorted[our.data$democrat==1 & our.data$liberal==1] = 1
table(our.data$sorted)
our.data$sorted[is.na(our.data$partyID)=="TRUE" | is.na(our.data$ideology)=="TRUE"] = NA
table(our.data$sorted)
table(our.data$sorted[our.data$republican==1], our.data$ideology[our.data$republican==1])
table(our.data$sorted[our.data$democrat==1], our.data$ideology[our.data$democrat==1])
# 1 = CONSERVATIVE REPUBLICAN OR LIBERAL DEMOCRAT (SELF-IDENTIFIED); 0 = ALL OTHERS

# PARTY PLACEMENT: DEMOCRATIC PARTY 
table(our.data$CC16_340g)
our.data$demplacement = car::recode(as.numeric(our.data$CC16_340g), "8 = NA") - 1
table(our.data$demplacement, our.data$CC16_340g)
# 0-TO-6 SCALE, 0 = VERY LIBERAL; 6 = VERY CONSERVATIVE; 
# NOT SURE PROVISIONALLY CODED AS NA

# PARTY PLACEMENT: REPUBLICAN PARTY 
table(our.data$CC16_340h)
our.data$GOPplacement = car::recode(as.numeric(our.data$CC16_340h), "8 = NA") - 1
table(our.data$GOPplacement, our.data$CC16_340h)
summary(our.data$GOPplacement)
# 0-TO-6 SCALE, 0 = VERY LIBERAL; 6 = VERY CONSERVATIVE; 
# NOT SURE PROVISIONALLY CODED AS NA

# KNOWS PARTIES' POLICY REPUTATIONS
our.data$knowsreputations = ifelse((our.data$GOPplacement > our.data$demplacement) & 
                                     (our.data$GOPplacement>=0 & our.data$GOPplacement<=6) & 
                                     (our.data$demplacement>=0 & our.data$demplacement<=6), 1, 0)
table(our.data$knowsreputations)
sum(table(our.data$knowsreputations))
# DK CODED AS NOT KNOWING POLICY REPUTATIONS
our.data$knowsreputations[our.data$CC16_340h=="Not sure"] = 0
table(our.data$knowsreputations)
our.data$knowsreputations[our.data$CC16_340g=="Not sure"] = 0
table(our.data$knowsreputations)
sum(table(our.data$knowsreputations))
# 0 = DOESN'T KNOW THAT THE REPUBLICAN PARTY IS MORE CONSERVATIVE THAN THE DEMOCRATIC PARTY;
# 1 = KNOWS THAT THE REPUBLICAN PARTY IS MORE CONSERVATIVE THAN THE DEMOCRATIC PARTY. 

# PROGRAMMATIC INDEX
our.data$programmatic = rep(NA, length(our.data$ideology))
# NOT MATCHED
our.data$programmatic[our.data$sorted==0 & our.data$knowsreputations==0] = 0
table(our.data$programmatic)
# PARTIALLY MATCHED
our.data$programmatic[our.data$sorted==1 & our.data$knowsreputations==0] = 0.5
table(our.data$programmatic)
our.data$programmatic[our.data$sorted==0 & our.data$knowsreputations==1] = 0.5
table(our.data$programmatic)
# MATCHED
our.data$programmatic[our.data$sorted==1 & our.data$knowsreputations==1] = 1
table(our.data$programmatic)
summary(our.data$programmatic)
# 1 = MATCHED; 0.5 = PARTIALLY MATCHED; 0 = UNMATCHED 

# SAVE DATASET 
# write.csv(our.data, "CCES2016_cleaned.csv")

